	
	**************** 
	** Clean UN Population projections (all countries)
	****************
	
	** Current Estimates
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Estimates") cellrange(A17:AF20541) case(lower)

		gen countryregion=regionsubregioncountryorar
		drop index variant regionsubregioncountryorar notes locationcode iso3alphacode iso2alphacode sdmxcode parentcode
		order countryregion
		
		drop if countryregion=="UN development groups"		
		drop if countryregion=="Geographic regions"		
		drop if countryregion=="World Bank income groups"		
		drop if countryregion=="Sustainable Development Goal (SDG) regions"				
		replace countryregion="World" if countryregion=="WORLD"
		replace countryregion="Tanzania" if countryregion=="United Republic of Tanzania"

		foreach var of varlist l-af {
		   local newvar: variable label `var'
		   local newvar = subinstr("`newvar'","-","to",.)
		   local newvar = subinstr("`newvar'","+","plus",.)
		   rename `var' pop_`newvar'
		}
		
		destring pop_*, replace
		
		** total poplation and population above certain ages
		egen pop_total=rowtotal(pop_*)
		egen pop_60plus=rowtotal(pop_60to64 pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_65plus=rowtotal(pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_70plus=rowtotal(pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_75plus=rowtotal(pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		
		** below 15 and 15-64 for dependency ratios
		egen pop_below15=rowtotal(pop_0to4 pop_5to9 pop_10to14)
		egen pop_15to59=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59)
		egen pop_15to64=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59 pop_60to64)
		
		** depedency ratio
		gen depratio_15to59=(pop_below15+pop_60plus)/pop_15to59
		gen depratio_15to64=(pop_below15+pop_65plus)/pop_15to64
		
		gen oldagedepratio_15to59=pop_60plus/pop_15to59
		gen oldagedepratio_15to64=pop_65plus/pop_15to64
		
		tempfile estimates
		save `estimates'
	
	** Projections
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Medium variant") cellrange(A17:AF22536) case(lower)
		
		gen countryregion=regionsubregioncountryorar
		drop index variant regionsubregioncountryorar notes locationcode iso3alphacode iso2alphacode sdmxcode parentcode
		order countryregion

		drop if countryregion=="UN development groups"		
		drop if countryregion=="Geographic regions"		
		drop if countryregion=="World Bank income groups"		
		drop if countryregion=="Sustainable Development Goal (SDG) regions"				
		replace countryregion="World" if countryregion=="WORLD"
		replace countryregion="Tanzania" if countryregion=="United Republic of Tanzania"

		foreach var of varlist l-af {
		   local newvar: variable label `var'
		   local newvar = subinstr("`newvar'","-","to",.)
		   local newvar = subinstr("`newvar'","+","plus",.)
		   rename `var' pop_`newvar'
		}
		
		destring pop_*, replace
		
		** total poplation and population above certain ages
		egen pop_total=rowtotal(pop_*)
		egen pop_60plus=rowtotal(pop_60to64 pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_65plus=rowtotal(pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_70plus=rowtotal(pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_75plus=rowtotal(pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
			
		** below 15 and 15-64 for dependency ratios
		egen pop_below15=rowtotal(pop_0to4 pop_5to9 pop_10to14)
		egen pop_15to59=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59)
		egen pop_15to64=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59 pop_60to64)
		
		** depedency ratio
		gen depratio_15to59=(pop_below15+pop_60plus)/pop_15to59
		gen depratio_15to64=(pop_below15+pop_65plus)/pop_15to64
		
		gen oldagedepratio_15to59=pop_60plus/pop_15to59
		gen oldagedepratio_15to64=pop_65plus/pop_15to64
		
	** Append projections and estimates
	
		append using `estimates'
		
		// tab year
		// years available are from 1950 to 2100
		// use 2020 as "today"
		// last 30 years would be 1990-2020
		// next 30 years would be 2020-2050
		// adding 2100
		
	** Reshape to one observation 
		
		local vars "pop_total pop_60plus pop_65plus pop_70plus pop_75plus pop_below15 pop_15to59 pop_15to64 depratio_15to59 depratio_15to64 oldagedepratio_15to59 oldagedepratio_15to64"
		local vars_pop "pop_total pop_60plus pop_65plus pop_70plus pop_75plus pop_below15 pop_15to59 pop_15to64"
		
		keep countryregion type year `vars'
		
		duplicates drop

		keep if year==1990 | year==2020 | year==2050 | year==2100
		
		foreach var in `vars' {
			ren `var' `var'_
		}
		reshape wide `vars', i(countryregion type) j(year)
		
	** Growth rates
		ren pop_total* pop_55plus* // temp for the loop
		forvalues age=55(5)75 {
			gen growth_pop_`age'plus_1990to2020=(pop_`age'plus_2020-pop_`age'plus_1990)/pop_`age'plus_1990*100
			gen growth_pop_`age'plus_2020to2050=(pop_`age'plus_2050-pop_`age'plus_2020)/pop_`age'plus_2020*100
			gen growth_pop_`age'plus_2050to2100=(pop_`age'plus_2100-pop_`age'plus_2050)/pop_`age'plus_2050*100
			gen growth_pop_`age'plus_2020to2100=(pop_`age'plus_2100-pop_`age'plus_2020)/pop_`age'plus_2020*100
		}
		ren *55plus* *total*
		
	** Save
		save "$dir/data/population_projections_allcountries.dta", replace
